In [1]:
%matplotlib inline

import numpy as np
import pandas as pd

from sklearn import grid_search
from sklearn import metrics
from sklearn import cross_validation
from sklearn.externals import joblib

import xgboost as xgb

import matplotlib.pyplot as plt
import seaborn as sns

import operator
import itertools
import random
import os
import pickle
import time

PATHS


In [2]:
DATA_DIRECTORY = "E:\\eaglesense\\data\\topviewkinect"
PREPROCESSED_DIRECTORY = DATA_DIRECTORY + "\\all"
FEATURE_SET = "eval-chi2"

In [3]:
if not os.path.exists("results"):
    os.makedirs("results")

LOAD DATA


In [4]:
features_csv = "{root}/{tag}_features.csv".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
features_df = pd.read_csv(features_csv)

In [5]:
features_df.head()


Out[5]:
layer_area_0 layer_area_1 layer_area_2 layer_contours_0 layer_contours_1 layer_distance_0 layer_distance_1 layer_distance_2 layer_distance_3 layer_distance_4 ... interlayer_pos_16 interlayer_pos_17 extremities0 extreme_infrared_0 extreme_infrared_1 extreme_infrared_2 extreme_infrared_3 extreme_infrared_4 extreme_infrared_5 subject
0 0.297578 0.411765 0.290657 3.0 3.0 16.5529 26.6833 26.0192 26.6833 201.0 ... -26.0 -107.0 4.0 0.000000e+00 10.0 11.5 11.5 0.0 11.5 2001.0
1 0.310345 0.419238 0.270417 3.0 3.0 16.4012 26.4764 26.0192 26.4764 191.5 ... -26.0 -105.0 5.0 5.000000e-01 9.0 11.0 1.0 0.5 11.0 2001.0
2 0.333959 0.386492 0.279550 3.0 3.0 16.4012 26.2488 26.1725 26.2488 170.5 ... -25.0 -103.0 5.0 2.075076e-322 12.5 4.5 4.5 0.5 13.0 2001.0
3 0.348399 0.384181 0.267420 3.0 3.0 16.4012 26.4197 26.4764 26.4197 164.0 ... -25.0 -103.0 5.0 0.000000e+00 6.0 4.5 0.0 0.0 7.0 2001.0
4 0.356383 0.370567 0.273050 3.0 3.0 17.7200 27.4591 27.4591 27.4591 164.5 ... -26.0 -107.0 3.0 0.000000e+00 0.0 0.5 0.0 0.0 0.5 2001.0

5 rows × 73 columns


In [6]:
labels_csv = "{root}/{tag}_labels.csv".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
labels_df = pd.read_csv(labels_csv)

In [7]:
s1_data_path = "{root}/{tag}_s1_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
s2_data_path = "{root}/{tag}_s2_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
cs_data_path = "{root}/{tag}_cs_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
noinfrared_data_path = "{root}/{tag}_cs_noinfrared_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)

with open(s1_data_path, "rb") as f:
    s1_data = pickle.load(f)
    
with open(s2_data_path, "rb") as f:
    s2_data = pickle.load(f)

with open(cs_data_path, "rb") as f:
    cs_data = pickle.load(f)

with open(noinfrared_data_path, "rb") as f:
    noinfrared_data = pickle.load(f)

In [8]:
unique_subjects = features_df["subject"].unique()
unique_subjects


Out[8]:
array([ 2001.,  2002.,  2003.,  2004.,  2005.,  2006.,  2007.,  2008.,
        2009.,  2010.,  2011.,  2012.])

In [9]:
ACTIVITIES = ["Standing", "Sitting", "Pointing", "Phone", "Tablet", "Paper"]

In [10]:
num_activities = len(ACTIVITIES)
num_activities


Out[10]:
6

PARAMETERS


In [11]:
XGB_PARAM_FINAL = {}
XGB_PARAM_FINAL["eta"] = 0.3
XGB_PARAM_FINAL["gamma"] = 1
XGB_PARAM_FINAL["lambda"] = 1
XGB_PARAM_FINAL["alpha"] = 0
XGB_PARAM_FINAL["max_depth"] = 6
XGB_PARAM_FINAL["colsample_bytree"] = 0.5
XGB_PARAM_FINAL["colsample_bylevel"] = 0.5
XGB_PARAM_FINAL["subsample"] = 0.5
XGB_PARAM_FINAL["objective"] = "multi:softmax"
XGB_PARAM_FINAL["eval_metric"] = "merror"
XGB_PARAM_FINAL["num_class"] = len(ACTIVITIES)
XGB_PARAM_FINAL["silent"] = 0
XGB_NUM_ROUNDS = 200
XGB_EARLYSTOPPING_ROUNDS = 30

UTILITY


In [12]:
def crosssubject_test_split(features_df, labels_df, training_subjects_ids):
    num_features = features_df.shape[1] - 1
    
    X_train = np.array([], dtype=np.float64).reshape(0, num_features)
    y_train = np.array([], dtype=np.int32).reshape(0, 1)
    X_test = np.array([], dtype=np.float64).reshape(0, num_features)
    y_test = np.array([], dtype=np.int32).reshape(0, 1)

    for subject_id in unique_subjects:
        subject_features = features_df[features_df["subject"] == subject_id]
        subject_features = subject_features.drop(["subject"], axis=1)
        subject_labels = labels_df[labels_df["subject"] == subject_id]
        subject_labels = subject_labels[["activity"]]
        subject_X = subject_features.values
        subject_y = subject_labels.values

        if subject_id in training_subjects_ids:
            X_train = np.vstack([X_train, subject_X])
            y_train = np.vstack([y_train, subject_y])
        else:
            X_test = np.vstack([X_test, subject_X])
            y_test = np.vstack([y_test, subject_y])
    
    return X_train, y_train, X_test, y_test

In [13]:
def get_normalized_confusion_matrix(y_true, y_predicted):
    confusion_matrix = metrics.confusion_matrix(y_true, y_predicted)
    confusion_matrix_normalized = confusion_matrix.astype("float") / confusion_matrix.sum(axis=1)[:, np.newaxis]
    confusion_matrix_normalized *= 100
    return confusion_matrix_normalized

Samples Test 1


In [14]:
s1_X_train = s1_data["X_train"]
s1_y_train = s1_data["y_train"]
s1_X_test = s1_data["X_test"]
s1_y_test = s1_data["y_test"]

In [15]:
s1_X_train.shape


Out[15]:
(25653, 72)

In [16]:
s1_X_test.shape


Out[16]:
(51371, 72)

In [17]:
s1_train_xgbmatrix = xgb.DMatrix(s1_X_train, s1_y_train)
s1_test_xgbmatrix = xgb.DMatrix(s1_X_test, s1_y_test)
s1_watchlist = [(s1_train_xgbmatrix, "train"), (s1_test_xgbmatrix, "eval")]

In [18]:
s1_eval_results = {}
s1_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=s1_train_xgbmatrix, evals=s1_watchlist, evals_result=s1_eval_results,
                          num_boost_round=XGB_NUM_ROUNDS, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)


[0]	train-merror:0.078509	eval-merror:0.086333
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
[100]	train-merror:0.000936	eval-merror:0.015573

In [19]:
s1_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=s1_train_xgbmatrix, num_boost_round=s1_validation.best_iteration+1)

In [20]:
s1_y_predicted = s1_booster.predict(s1_test_xgbmatrix)

In [21]:
s1_accuracy = metrics.accuracy_score(s1_y_test, s1_y_predicted)
s1_accuracy


Out[21]:
0.98473847112183921

In [22]:
s1_confusion_matrix = get_normalized_confusion_matrix(s1_y_test, s1_y_predicted)

In [23]:
s1_results_dump = {
    "eval_results": s1_eval_results,
    "eval_earlystoppping_best_iteration": s1_validation.best_iteration+1,
    "eval_earlystoppping_best_score": s1_validation.best_score,
    "classifier": s1_booster,
    "final_accuracy": s1_accuracy,
    "final_confusion_matrix": s1_confusion_matrix
}

with open("results/s1.pickle", "wb") as f:
    pickle.dump(s1_results_dump, f)

Samples Test 2


In [24]:
s2_X_train = s2_data["X_train"]
s2_y_train = s2_data["y_train"]
s2_X_test = s2_data["X_test"]
s2_y_test = s2_data["y_test"]

In [25]:
s2_X_train.shape


Out[25]:
(51324, 72)

In [26]:
s2_X_test.shape


Out[26]:
(25700, 72)

In [27]:
s2_train_xgbmatrix = xgb.DMatrix(s2_X_train, s2_y_train)
s2_test_xgbmatrix = xgb.DMatrix(s2_X_test, s2_y_test)
s2_watchlist = [(s2_train_xgbmatrix, "train"), (s2_test_xgbmatrix, "eval")]

In [28]:
s2_eval_results = {}
s2_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=s2_train_xgbmatrix, evals=s2_watchlist, evals_result=s2_eval_results, 
                          num_boost_round=XGB_NUM_ROUNDS, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)


[0]	train-merror:0.076085	eval-merror:0.081556
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
[100]	train-merror:0.001013	eval-merror:0.011206

In [29]:
s2_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=s2_train_xgbmatrix, num_boost_round=s2_validation.best_iteration+1)

In [30]:
s2_y_predicted = s2_booster.predict(s2_test_xgbmatrix)

In [31]:
s2_accuracy = metrics.accuracy_score(s2_y_test, s2_y_predicted)
s2_accuracy


Out[31]:
0.98972762645914392

In [32]:
s2_confusion_matrix = get_normalized_confusion_matrix(s2_y_test, s2_y_predicted)

In [33]:
s2_results_dump = {
    "eval_results": s2_eval_results,
    "eval_earlystoppping_best_iteration": s2_validation.best_iteration+1,
    "eval_earlystoppping_best_score": s2_validation.best_score,
    "classifier": s2_booster,
    "final_accuracy": s2_accuracy,
    "final_confusion_matrix": s2_confusion_matrix
}

with open("results/s2.pickle", "wb") as f:
    pickle.dump(s2_results_dump, f)

Cross-subject Test 1


In [14]:
cs_X_train = cs_data["X_train"]
cs_y_train = cs_data["y_train"]
cs_X_test = cs_data["X_test"]
cs_y_test = cs_data["y_test"]

In [16]:
cs_X_train.shape


Out[16]:
(34945, 72)

In [17]:
cs_X_test.shape


Out[17]:
(42079, 72)

RANDOM FOREST


In [37]:
from sklearn import ensemble

In [38]:
rf_clf = ensemble.RandomForestClassifier(n_estimators=100, criterion="entropy", max_depth=None, max_features="sqrt", 
                                         random_state=42, n_jobs=-1)

In [39]:
rf_training_start = time.time()
rf_clf.fit(cs_X_train, cs_y_train.ravel())
rf_training_time = (time.time() - rf_training_start)
rf_training_time


Out[39]:
2.5243890285491943

In [40]:
rf_testing_start = time.time()
rf_y_predicted = rf_clf.predict(cs_X_test)
rf_testing_time = (time.time() - rf_testing_start)
rf_testing_time


Out[40]:
0.2706270217895508

In [41]:
rf_y_train_predicted = rf_clf.predict(cs_X_train)
rf_train_accuracy = metrics.accuracy_score(cs_y_train, rf_y_train_predicted)
rf_train_accuracy


Out[41]:
1.0

In [42]:
rf_accuracy = metrics.accuracy_score(cs_y_test, rf_y_predicted)
rf_accuracy


Out[42]:
0.84438793697568859

In [43]:
rf_confusion_matrix = get_normalized_confusion_matrix(cs_y_test, rf_y_predicted)

In [44]:
rf_results_dump = {
    "training_time": rf_training_time,
    "testing_time": rf_testing_time,
    "training_accuracy": rf_train_accuracy,
    "final_accuracy": rf_accuracy,
    "final_confusion_matrix": rf_confusion_matrix
}

with open("results/cs_rf.pickle", "wb") as f:
    pickle.dump(rf_results_dump, f)

XGBOOST


In [53]:
cs_X_train.shape


Out[53]:
(34945, 72)

In [54]:
cs_X_test.shape


Out[54]:
(42079, 72)

In [20]:
cs_train_xgbmatrix = xgb.DMatrix(cs_X_train, cs_y_train)
cs_test_xgbmatrix = xgb.DMatrix(cs_X_test, cs_y_test)
cs_watchlist = [(cs_train_xgbmatrix, "train"), (cs_test_xgbmatrix, "eval")]

In [21]:
cs_eval_results = {}
cs_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=cs_train_xgbmatrix, evals=cs_watchlist, evals_result=cs_eval_results,
                          num_boost_round=XGB_NUM_ROUNDS, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)


[0]	train-merror:0.053112	eval-merror:0.282588
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
[100]	train-merror:0.000601	eval-merror:0.095891
Stopping. Best iteration:
[78]	train-merror:0.000916	eval-merror:0.094536


In [22]:
xgboost_training_start = time.time()
cs_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=cs_train_xgbmatrix, num_boost_round=cs_validation.best_iteration+1)
xgboost_training_time = (time.time() - xgboost_training_start)
xgboost_training_time


Out[22]:
7.399036884307861

In [55]:
total_time = list()

In [ ]:
# training
for i in range(cs_X_train.shape[0]):
    x = cs_X_train[i,:]
    x = x.reshape((1, 72))
    x_dmatrix = xgb.DMatrix(x)
    start = time.time()
    cs_booster.predict(x_dmatrix)
    total_time.append(time.time() - start)

# testing
for i in range(cs_X_test.shape[0]):
    x = cs_X_test[i,:]
    x = x.reshape((1, 72))
    x_dmatrix = xgb.DMatrix(x)
    start = time.time()
    cs_booster.predict(x_dmatrix)
    total_time.append(time.time() - start)

In [ ]:
avg_time = np.mean(total_time)

In [ ]:
avg_time * 1000

In [ ]:
std_time = np.std(total_time)

In [ ]:
std_time * 1000

In [50]:
xgboost_testing_start = time.time()
cs_y_predicted = cs_booster.predict(cs_test_xgbmatrix)
xgboost_testing_time = (time.time() - xgboost_testing_start)
xgboost_testing_time


Out[50]:
0.2311539649963379

In [51]:
cs_y_train_predicted = rf_clf.predict(cs_X_train)
cs_train_accuracy = metrics.accuracy_score(cs_y_train, cs_y_train_predicted)
cs_train_accuracy


Out[51]:
1.0

In [52]:
cs_accuracy = metrics.accuracy_score(cs_y_test, cs_y_predicted)
cs_accuracy


Out[52]:
0.90546353287863302

In [53]:
cs_confusion_matrix = get_normalized_confusion_matrix(cs_y_test, cs_y_predicted)

In [54]:
cs_confusion_matrix_subjects = list()

for subject_id in unique_subjects:
    subject_features = features_df[features_df["subject"] == subject_id]
    subject_features = subject_features.drop(["subject"], axis=1)
    subject_labels = labels_df[labels_df["subject"] == subject_id]
    subject_labels = subject_labels[["activity"]]
    subject_X = subject_features.values
    subject_y = subject_labels.values

    subject_xgbmatrix = xgb.DMatrix(subject_X, subject_y)
    subject_y_predicted = cs_booster.predict(subject_xgbmatrix)
    
    subject_accuracy = metrics.accuracy_score(subject_y, subject_y_predicted)
    subject_confusion_matrix = get_normalized_confusion_matrix(subject_y, subject_y_predicted)
    cs_confusion_matrix_subjects.append((subject_id, subject_accuracy, subject_confusion_matrix))

In [55]:
for activity_idx, activity in enumerate(ACTIVITIES):
    activity_accuracy = cs_confusion_matrix[activity_idx, activity_idx]
    activity_error = 100 - activity_accuracy
    print(activity, "\tAccuracy:", activity_accuracy, "\tError:", activity_error)


Standing 	Accuracy: 96.5608835454 	Error: 3.43911645463
Sitting 	Accuracy: 98.2380659701 	Error: 1.76193402991
Pointing 	Accuracy: 92.987612849 	Error: 7.01238715096
Phone 	Accuracy: 63.8150093173 	Error: 36.1849906827
Tablet 	Accuracy: 93.1763766959 	Error: 6.82362330407
Paper 	Accuracy: 91.7335243553 	Error: 8.2664756447

In [56]:
cs_results_dump = {
    "training_time": xgboost_training_time,
    "testing_time": xgboost_testing_time,
    "eval_results": cs_eval_results,
    "eval_earlystoppping_best_iteration": cs_validation.best_iteration+1,
    "eval_earlystoppping_best_score": cs_validation.best_score,
    "classifier": cs_booster,
    "training_accuracy": cs_train_accuracy,
    "final_accuracy": cs_accuracy,
    "final_confusion_matrix": cs_confusion_matrix,
    "subject_confusion_matrix": cs_confusion_matrix_subjects
}

with open("results/cs.pickle", "wb") as f:
    pickle.dump(cs_results_dump, f)

NO INFRARED


In [57]:
noinfrared_X_train = noinfrared_data["X_train"]
noinfrared_y_train = noinfrared_data["y_train"]
noinfrared_X_test = noinfrared_data["X_test"]
noinfrared_y_test = noinfrared_data["y_test"]

In [58]:
noinfrared_X_train.shape


Out[58]:
(34945, 66)

In [59]:
noinfrared_X_test.shape


Out[59]:
(42079, 66)

In [60]:
noinfrared_train_xgbmatrix = xgb.DMatrix(noinfrared_X_train, noinfrared_y_train)
noinfrared_test_xgbmatrix = xgb.DMatrix(noinfrared_X_test, noinfrared_y_test)
noinfrared_watchlist = [(noinfrared_train_xgbmatrix, "train"), (noinfrared_test_xgbmatrix, "eval")]

In [61]:
noinfrared_eval_results = {}
noinfrared_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=noinfrared_train_xgbmatrix, evals=noinfrared_watchlist,
                                  evals_result=noinfrared_eval_results, num_boost_round=XGB_NUM_ROUNDS, 
                                  early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)


[0]	train-merror:0.069166	eval-merror:0.290644
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
Stopping. Best iteration:
[45]	train-merror:0.002318	eval-merror:0.173507


In [62]:
noinfrared_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=noinfrared_train_xgbmatrix,
                               num_boost_round=noinfrared_validation.best_iteration+1)

In [63]:
noinfrared_y_predicted = noinfrared_booster.predict(noinfrared_test_xgbmatrix)

In [64]:
noinfrared_accuracy = metrics.accuracy_score(noinfrared_y_test, noinfrared_y_predicted)
noinfrared_accuracy


Out[64]:
0.82649302502435895

In [65]:
noinfrared_confusion_matrix = get_normalized_confusion_matrix(noinfrared_y_test, noinfrared_y_predicted)

In [66]:
noninfrared_results_dump = {
    "eval_results": noinfrared_eval_results,
    "eval_earlystoppping_best_iteration": noinfrared_validation.best_iteration+1,
    "eval_earlystoppping_best_score": noinfrared_validation.best_score,
    "classifier": noinfrared_booster,
    "final_accuracy": noinfrared_accuracy,
    "final_confusion_matrix": noinfrared_confusion_matrix,
}

with open("results/cs_noinfrared.pickle", "wb") as f:
    pickle.dump(noninfrared_results_dump, f)

All Cross-Subjects


In [67]:
cs_combinations = list(itertools.combinations(unique_subjects, int(len(unique_subjects)/2)))
len(cs_combinations)


Out[67]:
924

In [68]:
cs_combinations_results_csv = "results/cs_combinations.csv"

In [69]:
open(cs_combinations_results_csv, "w").close()
with open(cs_combinations_results_csv, "a") as f:
    data_columns = pd.DataFrame(columns=["combination", "activity", "a1", "a2", "a3", "a4", "a5", "a6"])
    data_columns.to_csv(f, header=True, index=False)

In [70]:
for cs_combination_idx, cs_combination in enumerate(cs_combinations):
    print(cs_combination_idx, "... ", end="")

    # Get data
    combination_X_train, combination_y_train, combination_X_test, combination_y_test = crosssubject_test_split(
        features_df, labels_df, cs_combination)
    combination_train_xgbmatrix = xgb.DMatrix(combination_X_train, combination_y_train)
    combination_test_xgbmatrix = xgb.DMatrix(combination_X_test, combination_y_test)

    # Train
    combination_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=combination_train_xgbmatrix, num_boost_round=cs_validation.best_iteration+1)
    combination_y_predicted = combination_booster.predict(combination_test_xgbmatrix)
    
    # Get results
    combination_results = metrics.confusion_matrix(combination_y_test, combination_y_predicted)
    combination_results_df = pd.DataFrame(columns=["combination", "activity", "a1", "a2", "a3", "a4", "a5", "a6"])
    for activity_id, activity in enumerate(ACTIVITIES):
        combination_results_df.loc[activity_id] = [
            cs_combination_idx, activity, 
            combination_results[activity_id,0], combination_results[activity_id,1], combination_results[activity_id,2], 
            combination_results[activity_id,3], combination_results[activity_id,4], combination_results[activity_id,5]
        ]

    # Append results
    with open(cs_combinations_results_csv, "a") as f:
        combination_results_df.to_csv(f, header=False, index=False)


0 ... 1 ... 2 ... 3 ... 4 ... 5 ... 6 ... 7 ... 8 ... 9 ... 10 ... 11 ... 12 ... 13 ... 14 ... 15 ... 16 ... 17 ... 18 ... 19 ... 20 ... 21 ... 22 ... 23 ... 24 ... 25 ... 26 ... 27 ... 28 ... 29 ... 30 ... 31 ... 32 ... 33 ... 34 ... 35 ... 36 ... 37 ... 38 ... 39 ... 40 ... 41 ... 42 ... 43 ... 44 ... 45 ... 46 ... 47 ... 48 ... 49 ... 50 ... 51 ... 52 ... 53 ... 54 ... 55 ... 56 ... 57 ... 58 ... 59 ... 60 ... 61 ... 62 ... 63 ... 64 ... 65 ... 66 ... 67 ... 68 ... 69 ... 70 ... 71 ... 72 ... 73 ... 74 ... 75 ... 76 ... 77 ... 78 ... 79 ... 80 ... 81 ... 82 ... 83 ... 84 ... 85 ... 86 ... 87 ... 88 ... 89 ... 90 ... 91 ... 92 ... 93 ... 94 ... 95 ... 96 ... 97 ... 98 ... 99 ... 100 ... 101 ... 102 ... 103 ... 104 ... 105 ... 106 ... 107 ... 108 ... 109 ... 110 ... 111 ... 112 ... 113 ... 114 ... 115 ... 116 ... 117 ... 118 ... 119 ... 120 ... 121 ... 122 ... 123 ... 124 ... 125 ... 126 ... 127 ... 128 ... 129 ... 130 ... 131 ... 132 ... 133 ... 134 ... 135 ... 136 ... 137 ... 138 ... 139 ... 140 ... 141 ... 142 ... 143 ... 144 ... 145 ... 146 ... 147 ... 148 ... 149 ... 150 ... 151 ... 152 ... 153 ... 154 ... 155 ... 156 ... 157 ... 158 ... 159 ... 160 ... 161 ... 162 ... 163 ... 164 ... 165 ... 166 ... 167 ... 168 ... 169 ... 170 ... 171 ... 172 ... 173 ... 174 ... 175 ... 176 ... 177 ... 178 ... 179 ... 180 ... 181 ... 182 ... 183 ... 184 ... 185 ... 186 ... 187 ... 188 ... 189 ... 190 ... 191 ... 192 ... 193 ... 194 ... 195 ... 196 ... 197 ... 198 ... 199 ... 200 ... 201 ... 202 ... 203 ... 204 ... 205 ... 206 ... 207 ... 208 ... 209 ... 210 ... 211 ... 212 ... 213 ... 214 ... 215 ... 216 ... 217 ... 218 ... 219 ... 220 ... 221 ... 222 ... 223 ... 224 ... 225 ... 226 ... 227 ... 228 ... 229 ... 230 ... 231 ... 232 ... 233 ... 234 ... 235 ... 236 ... 237 ... 238 ... 239 ... 240 ... 241 ... 242 ... 243 ... 244 ... 245 ... 246 ... 247 ... 248 ... 249 ... 250 ... 251 ... 252 ... 253 ... 254 ... 255 ... 256 ... 257 ... 258 ... 259 ... 260 ... 261 ... 262 ... 263 ... 264 ... 265 ... 266 ... 267 ... 268 ... 269 ... 270 ... 271 ... 272 ... 273 ... 274 ... 275 ... 276 ... 277 ... 278 ... 279 ... 280 ... 281 ... 282 ... 283 ... 284 ... 285 ... 286 ... 287 ... 288 ... 289 ... 290 ... 291 ... 292 ... 293 ... 294 ... 295 ... 296 ... 297 ... 298 ... 299 ... 300 ... 301 ... 302 ... 303 ... 304 ... 305 ... 306 ... 307 ... 308 ... 309 ... 310 ... 311 ... 312 ... 313 ... 314 ... 315 ... 316 ... 317 ... 318 ... 319 ... 320 ... 321 ... 322 ... 323 ... 324 ... 325 ... 326 ... 327 ... 328 ... 329 ... 330 ... 331 ... 332 ... 333 ... 334 ... 335 ... 336 ... 337 ... 338 ... 339 ... 340 ... 341 ... 342 ... 343 ... 344 ... 345 ... 346 ... 347 ... 348 ... 349 ... 350 ... 351 ... 352 ... 353 ... 354 ... 355 ... 356 ... 357 ... 358 ... 359 ... 360 ... 361 ... 362 ... 363 ... 364 ... 365 ... 366 ... 367 ... 368 ... 369 ... 370 ... 371 ... 372 ... 373 ... 374 ... 375 ... 376 ... 377 ... 378 ... 379 ... 380 ... 381 ... 382 ... 383 ... 384 ... 385 ... 386 ... 387 ... 388 ... 389 ... 390 ... 391 ... 392 ... 393 ... 394 ... 395 ... 396 ... 397 ... 398 ... 399 ... 400 ... 401 ... 402 ... 403 ... 404 ... 405 ... 406 ... 407 ... 408 ... 409 ... 410 ... 411 ... 412 ... 413 ... 414 ... 415 ... 416 ... 417 ... 418 ... 419 ... 420 ... 421 ... 422 ... 423 ... 424 ... 425 ... 426 ... 427 ... 428 ... 429 ... 430 ... 431 ... 432 ... 433 ... 434 ... 435 ... 436 ... 437 ... 438 ... 439 ... 440 ... 441 ... 442 ... 443 ... 444 ... 445 ... 446 ... 447 ... 448 ... 449 ... 450 ... 451 ... 452 ... 453 ... 454 ... 455 ... 456 ... 457 ... 458 ... 459 ... 460 ... 461 ... 462 ... 463 ... 464 ... 465 ... 466 ... 467 ... 468 ... 469 ... 470 ... 471 ... 472 ... 473 ... 474 ... 475 ... 476 ... 477 ... 478 ... 479 ... 480 ... 481 ... 482 ... 483 ... 484 ... 485 ... 486 ... 487 ... 488 ... 489 ... 490 ... 491 ... 492 ... 493 ... 494 ... 495 ... 496 ... 497 ... 498 ... 499 ... 500 ... 501 ... 502 ... 503 ... 504 ... 505 ... 506 ... 507 ... 508 ... 509 ... 510 ... 511 ... 512 ... 513 ... 514 ... 515 ... 516 ... 517 ... 518 ... 519 ... 520 ... 521 ... 522 ... 523 ... 524 ... 525 ... 526 ... 527 ... 528 ... 529 ... 530 ... 531 ... 532 ... 533 ... 534 ... 535 ... 536 ... 537 ... 538 ... 539 ... 540 ... 541 ... 542 ... 543 ... 544 ... 545 ... 546 ... 547 ... 548 ... 549 ... 550 ... 551 ... 552 ... 553 ... 554 ... 555 ... 556 ... 557 ... 558 ... 559 ... 560 ... 561 ... 562 ... 563 ... 564 ... 565 ... 566 ... 567 ... 568 ... 569 ... 570 ... 571 ... 572 ... 573 ... 574 ... 575 ... 576 ... 577 ... 578 ... 579 ... 580 ... 581 ... 582 ... 583 ... 584 ... 585 ... 586 ... 587 ... 588 ... 589 ... 590 ... 591 ... 592 ... 593 ... 594 ... 595 ... 596 ... 597 ... 598 ... 599 ... 600 ... 601 ... 602 ... 603 ... 604 ... 605 ... 606 ... 607 ... 608 ... 609 ... 610 ... 611 ... 612 ... 613 ... 614 ... 615 ... 616 ... 617 ... 618 ... 619 ... 620 ... 621 ... 622 ... 623 ... 624 ... 625 ... 626 ... 627 ... 628 ... 629 ... 630 ... 631 ... 632 ... 633 ... 634 ... 635 ... 636 ... 637 ... 638 ... 639 ... 640 ... 641 ... 642 ... 643 ... 644 ... 645 ... 646 ... 647 ... 648 ... 649 ... 650 ... 651 ... 652 ... 653 ... 654 ... 655 ... 656 ... 657 ... 658 ... 659 ... 660 ... 661 ... 662 ... 663 ... 664 ... 665 ... 666 ... 667 ... 668 ... 669 ... 670 ... 671 ... 672 ... 673 ... 674 ... 675 ... 676 ... 677 ... 678 ... 679 ... 680 ... 681 ... 682 ... 683 ... 684 ... 685 ... 686 ... 687 ... 688 ... 689 ... 690 ... 691 ... 692 ... 693 ... 694 ... 695 ... 696 ... 697 ... 698 ... 699 ... 700 ... 701 ... 702 ... 703 ... 704 ... 705 ... 706 ... 707 ... 708 ... 709 ... 710 ... 711 ... 712 ... 713 ... 714 ... 715 ... 716 ... 717 ... 718 ... 719 ... 720 ... 721 ... 722 ... 723 ... 724 ... 725 ... 726 ... 727 ... 728 ... 729 ... 730 ... 731 ... 732 ... 733 ... 734 ... 735 ... 736 ... 737 ... 738 ... 739 ... 740 ... 741 ... 742 ... 743 ... 744 ... 745 ... 746 ... 747 ... 748 ... 749 ... 750 ... 751 ... 752 ... 753 ... 754 ... 755 ... 756 ... 757 ... 758 ... 759 ... 760 ... 761 ... 762 ... 763 ... 764 ... 765 ... 766 ... 767 ... 768 ... 769 ... 770 ... 771 ... 772 ... 773 ... 774 ... 775 ... 776 ... 777 ... 778 ... 779 ... 780 ... 781 ... 782 ... 783 ... 784 ... 785 ... 786 ... 787 ... 788 ... 789 ... 790 ... 791 ... 792 ... 793 ... 794 ... 795 ... 796 ... 797 ... 798 ... 799 ... 800 ... 801 ... 802 ... 803 ... 804 ... 805 ... 806 ... 807 ... 808 ... 809 ... 810 ... 811 ... 812 ... 813 ... 814 ... 815 ... 816 ... 817 ... 818 ... 819 ... 820 ... 821 ... 822 ... 823 ... 824 ... 825 ... 826 ... 827 ... 828 ... 829 ... 830 ... 831 ... 832 ... 833 ... 834 ... 835 ... 836 ... 837 ... 838 ... 839 ... 840 ... 841 ... 842 ... 843 ... 844 ... 845 ... 846 ... 847 ... 848 ... 849 ... 850 ... 851 ... 852 ... 853 ... 854 ... 855 ... 856 ... 857 ... 858 ... 859 ... 860 ... 861 ... 862 ... 863 ... 864 ... 865 ... 866 ... 867 ... 868 ... 869 ... 870 ... 871 ... 872 ... 873 ... 874 ... 875 ... 876 ... 877 ... 878 ... 879 ... 880 ... 881 ... 882 ... 883 ... 884 ... 885 ... 886 ... 887 ... 888 ... 889 ... 890 ... 891 ... 892 ... 893 ... 894 ... 895 ... 896 ... 897 ... 898 ... 899 ... 900 ... 901 ... 902 ... 903 ... 904 ... 905 ... 906 ... 907 ... 908 ... 909 ... 910 ... 911 ... 912 ... 913 ... 914 ... 915 ... 916 ... 917 ... 918 ... 919 ... 920 ... 921 ... 922 ... 923 ... 

In [71]:
combinations_results_df = pd.read_csv(cs_combinations_results_csv)

In [72]:
combinations_confusion_matrix = np.zeros((num_activities, num_activities))

for activity_idx, activity in enumerate(ACTIVITIES):
    combinations_activity_results = combinations_results_df[combinations_results_df["activity"] == activity]
    for accuracy_idx, accuracy_column in enumerate(["a1", "a2", "a3", "a4", "a5", "a6"]):
        combinations_confusion_matrix[activity_idx, accuracy_idx] = combinations_activity_results[accuracy_column].sum()

combinations_confusion_matrix_normalized = combinations_confusion_matrix.astype("float") / combinations_confusion_matrix.sum(axis=1)[:, np.newaxis]
combinations_confusion_matrix_normalized *= 100

In [73]:
all_samples = np.sum(combinations_confusion_matrix)

In [74]:
accurate_samples = 0
for activity_id in range(len(ACTIVITIES)):
    accurate_samples += combinations_confusion_matrix[activity_id, activity_id]

In [75]:
combinations_accuracy = accurate_samples / all_samples
combinations_accuracy


Out[75]:
0.89485413103376332

In [76]:
combinations_results_dump = {
    "accuracy": combinations_accuracy,
    "confusion_matrix": combinations_confusion_matrix_normalized,
}

In [77]:
with open("results/cs_combinations.pickle", "wb") as f:
    pickle.dump(combinations_results_dump, f)

DEMO


In [78]:
# X.shape

In [79]:
# y.shape

In [80]:
# demo_train_xgbmatrix = xgb.DMatrix(X, y)
# demo_test_xgbmatrix = xgb.DMatrix(X, y)
# demo_watchlist = [(demo_train_xgbmatrix, "train"), (demo_test_xgbmatrix, "eval")]

In [81]:
# demo_results = {}
# demo_booster = xgb.train(XGB_PARAM_DEMO, demo_train_xgbmatrix, XGB_NUM_ROUNDS_DEMO, demo_watchlist, evals_result=demo_results, early_stopping_rounds=20)

In [82]:
# demo_booster.save_model("demo-xgboost.model")

In [83]:
# bst2 = xgb.Booster(model_file="demo-xgboost.model")

In [84]:
# test_dmatrix = xgb.DMatrix(X)
# y_predicted = bst2.predict(test_dmatrix)
# accuracy = metrics.accuracy_score(y, y_predicted)

In [85]:
# accuracy